In [1]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import vivarium, vivarium.test_util as vtu
from vivarium.framework.event import listens_for
from vivarium.framework.population import uses_columns
import json
import random

#from reports_header import *
%matplotlib inline
from IPython.display import set_matplotlib_formats
plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = 12, 6
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 14

plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = "serif"
plt.rcParams['font.serif'] = "cm"

In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)
import models.cohort_analysis_function as caf

In [3]:
n_simulants = 5000
n_days = 365
t_start = pd.Timestamp('2017-01-01')
t_timestep = 1
#with open('../data/processed/p_return_by_fac.json') as data_file:    
#    return_rates = json.load(data_file)

In [4]:
fac_return_rates = pd.read_csv('../data/processed/p_return_by_fac.csv', names = ['time', 'facility', 'proba'])
facilities_list = list(fac_return_rates.facility.unique())
fac_return_rates = fac_return_rates.set_index(['time', 'facility'])

In [18]:
fac_return_rates.proba.loc[[(-28.0, 53132.0), (-28.0, 32207.0), (-28.0, 52101.0), (-28.0, 13119.0), (-27.0, 51106.0), (-28.0, 95698.0), (-28.0, 31100.0), (-28.0, 41201.0), (-28.0, 37201.0), (-27.0, 32207.0), (-28.0, 71130.0), (-28.0, 43101.0), (-27.0, 11450.0), (-27.0, 11316.0), (-28.0, 11221.0), (-27.0, 11229.0), (-27.0, 51109.0), (-28.0, 73103.0)]]


Out[18]:
time  facility
-28   53132.0     0.037536
      32207.0     0.048292
      52101.0     0.029757
      13119.0     0.034560
-27   51106.0     0.001188
-28   95698.0     0.098039
      31100.0     0.020479
      41201.0     0.039566
      37201.0     0.019694
-27   32207.0     0.007200
-28   71130.0     0.099237
      43101.0     0.008286
-27   11450.0     0.160000
      11316.0     0.005577
-28   11221.0     0.034645
-27   11229.0     0.002451
      51109.0     0.008283
-28   73103.0     0.038689
Name: proba, dtype: float64

Simulate the cohort


In [19]:
class HIVCohortFollowUp:
    def __init__(self , fac_return_rates):
        self.reports = {}
        self.return_rates = fac_return_rates
        self.current_database = pd.DataFrame({'visit_date':[] , 'next_visit_date':[] , 'facility':[]})
    
    @listens_for('initialize_simulants')
    @uses_columns(['state', 'next_visit_date', 'first_visit','follow_up_duration', 'facility'])
    def initialize(self, event):
        # create the population dataframe
        population = pd.DataFrame(index=event.index)
        population['state'] = 'Out' #TO DO add Dead, transferred, other
        population['next_visit_date'] = pd.NaT
        population['follow_up_duration'] = 0
        population['facility'] = random.choices(facilities_list , k =  n_simulants)
        
        # Give first visit
        first_visit = np.round(np.random.uniform(0, 365, n_simulants))
        population['first_visit'] = t_start + pd.to_timedelta(first_visit, 'D')
        
        # Give first follow up
        rows = np.random.binomial(1, 0.5, size=len(population))
        population['next_visit_date'] = population['first_visit'] + pd.Timedelta(days=7*4)
        
        population.loc[population['first_visit'] == t_start , 'state'] = 'Followed'
        
        # update the population in the model
        event.population_view.update(population)
        
    @listens_for('time_step')
    @uses_columns(['state', 'first_visit', 'next_visit_date', 'facility'], 'state == "Out"')
    def new_patients(self, event):
        df = event.population
        df.loc[df['first_visit'] == event.time, 'state'] = 'Followed'
        df.loc[df['first_visit'] == event.time, 'next_visit_date'] = event.time + pd.Timedelta(days=7*4)
        n_new = len(df.loc[df['first_visit'] == event.time])
        self.current_database = self.current_database.append(pd.DataFrame({'visit_date':[event.time]*n_new, 
                                                                          'next_visit_date':df.loc[df['first_visit'] == event.time, 
                                                                                                   'next_visit_date'],
                                                                          
                                                                          'facility':df.loc[df['first_visit'] == event.time, 
                                                                                                   'facility']} , 
                                                             index = df.index[df['first_visit'] == event.time]))
        
        event.population_view.update(df)
        
    @listens_for('time_step')
    @uses_columns(['state', 'next_visit_date','facility'], 'state != "Out"')
    def next_appointment(self, event):
        df = event.population
        df.visit_delay = (event.time - df['next_visit_date'])/ np.timedelta64(1, 'D')
        
        list_draw = []
        for h in range(len(df)):
            list_draw = list_draw + [(df.visit_delay.iloc[h] , df.facility.iloc[h])]
        visit = np.random.binomial(1, list(self.return_rates.proba.loc[list_draw].fillna(0)), size=len(df))
        visitors = list(df.index[visit == 1])
        n_visitors = len(visitors)
        
        df.loc[visitors, 'next_visit_date'] = event.time + pd.Timedelta(days=7*4)
        if n_visitors > 0 :
            self.current_database = self.current_database.append(
                pd.DataFrame({'visit_date':[event.time]*n_visitors,
                              'next_visit_date': df.loc[visitors, 'next_visit_date'],
                             'facility' : df.loc[visitors, 'facility']} ,
                             index = list(df.loc[visitors].index)))
        
        event.population_view.update(df)
        
    @listens_for('time_step')
    @uses_columns(['state'], 'state != "Out"')
    def export_report_datapoint(self, event):
        date = event.time
        self.reports[date] = event.population.state.value_counts()

In [20]:
components = [HIVCohortFollowUp(fac_return_rates)]
vtu.config.simulation_parameters.time_step = 1/365
simulation = vtu.setup_simulation(components, population_size=n_simulants, start=t_start)
vtu.pump_simulation(simulation, time_step_days=t_timestep, duration=pd.Timedelta(days=n_days))


23
23
35
35
48
48
60
60
69
69
83
83
93
93
109
109
124
124
138
138
151
151
162
162
182
182
193
193
200
200
216
216
225
225
238
238
248
248
263
263
275
275
289
289
301
301
308
308
323
323
337
337
354
354
370
370
382
382
403
403
421
421
441
441
454
454
464
464
477
477
496
496
505
505
519
519
532
532
552
552
565
565
581
581
596
596
606
606
618
618
628
628
641
641
652
652
668
668
681
681
699
699
708
708
720
720
735
735
758
758
767
767
783
783
793
793
807
807
818
818
834
834
848
848
863
863
873
873
880
880
893
893
906
906
924
924
934
934
952
952
963
963
969
969
986
986
1005
1005
1019
1019
1029
1029
1041
1041
1053
1053
1064
1064
1074
1074
1089
1089
1102
1102
1114
1114
1127
1127
1141
1141
1157
1157
1169
1169
1183
1183
1198
1198
1211
1211
1225
1225
1241
1241
1265
1265
1282
1282
1290
1290
1308
1308
1322
1322
1334
1334
1344
1344
1360
1360
1377
1377
1385
1385
1396
1396
1410
1410
1426
1426
1442
1442
1460
1460
1478
1478
1489
1489
1503
1503
1515
1515
1529
1529
1544
1544
1561
1561
1579
1579
1599
1599
1612
1612
1625
1625
1637
1637
1646
1646
1657
1657
1669
1669
1677
1677
1687
1687
1694
1694
1704
1704
1714
1714
1725
1725
1734
1734
1747
1747
1762
1762
1772
1772
1779
1779
1796
1796
1810
1810
1819
1819
1830
1830
1840
1840
1856
1856
1867
1867
1889
1889
1908
1908
1927
1927
1941
1941
1952
1952
1965
1965
1983
1983
1996
1996
2006
2006
2028
2028
2040
2040
2052
2052
2061
2061
2075
2075
2085
2085
2099
2099
2117
2117
2130
2130
2155
2155
2170
2170
2181
2181
2192
2192
2209
2209
2219
2219
2226
2226
2241
2241
2252
2252
2263
2263
2277
2277
2290
2290
2300
2300
2310
2310
2324
2324
2336
2336
2355
2355
2369
2369
2382
2382
2394
2394
2418
2418
2434
2434
2440
2440
2457
2457
2469
2469
2481
2481
2495
2495
2513
2513
2526
2526
2546
2546
2561
2561
2573
2573
2581
2581
2595
2595
2613
2613
2625
2625
2638
2638
2650
2650
2659
2659
2672
2672
2694
2694
2703
2703
2716
2716
2731
2731
2742
2742
2755
2755
2771
2771
2782
2782
2801
2801
2816
2816
2830
2830
2843
2843
2856
2856
2872
2872
2882
2882
2902
2902
2918
2918
2938
2938
2955
2955
2963
2963
2972
2972
2984
2984
3005
3005
3015
3015
3031
3031
3041
3041
3054
3054
3071
3071
3086
3086
3099
3099
3115
3115
3131
3131
3143
3143
3156
3156
3167
3167
3181
3181
3194
3194
3214
3214
3226
3226
3244
3244
3255
3255
3270
3270
3289
3289
3306
3306
3317
3317
3330
3330
3349
3349
3355
3355
3372
3372
3393
3393
3403
3403
3416
3416
3428
3428
3442
3442
3453
3453
3467
3467
3488
3488
3503
3503
3512
3512
3527
3527
3543
3543
3555
3555
3568
3568
3582
3582
3601
3601
3610
3610
3620
3620
3634
3634
3643
3643
3660
3660
3683
3683
3692
3692
3703
3703
3714
3714
3729
3729
3744
3744
3762
3762
3780
3780
3794
3794
3805
3805
3826
3826
3838
3838
3851
3851
3869
3869
3886
3886
3897
3897
3915
3915
3924
3924
3938
3938
3956
3956
3965
3965
3975
3975
3987
3987
4005
4005
4012
4012
4026
4026
4033
4033
4046
4046
4059
4059
4077
4077
4085
4085
4101
4101
4117
4117
4132
4132
4150
4150
4162
4162
4177
4177
4188
4188
4201
4201
4213
4213
4228
4228
4242
4242
4253
4253
4266
4266
4277
4277
4291
4291
4304
4304
4320
4320
4332
4332
4342
4342
4353
4353
4364
4364
4379
4379
4394
4394
4410
4410
4425
4425
4443
4443
4458
4458
4475
4475
4485
4485
4500
4500
4509
4509
4526
4526
4537
4537
4550
4550
4559
4559
4571
4571
4582
4582
4596
4596
4610
4610
4622
4622
4643
4643
4659
4659
4674
4674
4684
4684
4697
4697
4708
4708
4723
4723
4735
4735
4749
4749
4767
4767
4786
4786
4799
4799
4815
4815
4823
4823
4839
4839
4850
4850
4870
4870
4879
4879
4895
4895
4909
4909
4921
4921
4937
4937
4954
4954
4969
4969
4988
4988
5000
5000
Out[20]:
365

Simulate Data Entry


In [21]:
full_database = components[0].current_database.copy()

weibull = np.random.weibull(.4, len(full_database))
times = np.round(weibull*40)
print(np.median(times))
plt.hist(times , bins=10);


16.0

In [22]:
full_database


Out[22]:
facility next_visit_date visit_date
124 81102.0 2017-01-30 2017-01-02
157 41100.0 2017-01-30 2017-01-02
392 35201.0 2017-01-30 2017-01-02
436 35101.0 2017-01-30 2017-01-02
708 71301.0 2017-01-30 2017-01-02
788 31101.0 2017-01-30 2017-01-02
2799 31102.0 2017-01-30 2017-01-02
2819 32207.0 2017-01-30 2017-01-02
3072 13199.0 2017-01-30 2017-01-02
3149 13120.0 2017-01-30 2017-01-02
4043 53132.0 2017-01-30 2017-01-02
4143 54101.0 2017-01-30 2017-01-02
4508 12201.0 2017-01-30 2017-01-02
4765 11100.0 2017-01-30 2017-01-02
4792 11156.0 2017-01-30 2017-01-02
4864 11134.0 2017-01-30 2017-01-02
788 31101.0 2017-01-30 2017-01-02
354 61120.0 2017-01-31 2017-01-03
716 11230.0 2017-01-31 2017-01-03
921 93301.0 2017-01-31 2017-01-03
1870 11404.0 2017-01-31 2017-01-03
2091 11109.0 2017-01-31 2017-01-03
2700 11224.0 2017-01-31 2017-01-03
3339 71130.0 2017-01-31 2017-01-03
3838 83401.0 2017-01-31 2017-01-03
3956 93301.0 2017-01-31 2017-01-03
4017 11234.0 2017-01-31 2017-01-03
4665 54101.0 2017-01-31 2017-01-03
4841 43101.0 2017-01-31 2017-01-03
4665 54101.0 2017-01-31 2017-01-03
... ... ... ...
3612 71104.0 2018-01-29 2018-01-01
3622 11144.0 2018-01-29 2018-01-01
3626 13120.0 2018-01-29 2018-01-01
3772 44302.0 2018-01-29 2018-01-01
3839 12107.0 2018-01-29 2018-01-01
3908 11503.0 2018-01-29 2018-01-01
3919 71107.0 2018-01-29 2018-01-01
3954 51109.0 2018-01-29 2018-01-01
3986 82201.0 2018-01-29 2018-01-01
4065 15103.0 2018-01-29 2018-01-01
4074 61120.0 2018-01-29 2018-01-01
4129 51100.0 2018-01-29 2018-01-01
4156 14206.0 2018-01-29 2018-01-01
4166 21401.0 2018-01-29 2018-01-01
4194 11100.0 2018-01-29 2018-01-01
4216 52101.0 2018-01-29 2018-01-01
4396 51204.0 2018-01-29 2018-01-01
4449 11116.0 2018-01-29 2018-01-01
4520 71106.0 2018-01-29 2018-01-01
4570 35201.0 2018-01-29 2018-01-01
4575 54101.0 2018-01-29 2018-01-01
4587 12201.0 2018-01-29 2018-01-01
4616 11106.0 2018-01-29 2018-01-01
4669 11144.0 2018-01-29 2018-01-01
4728 33101.0 2018-01-29 2018-01-01
4813 11503.0 2018-01-29 2018-01-01
4879 93301.0 2018-01-29 2018-01-01
4925 12201.0 2018-01-29 2018-01-01
4963 11116.0 2018-01-29 2018-01-01
4967 11405.0 2018-01-29 2018-01-01

23151 rows × 3 columns


In [23]:
def create_data_entry_date(data, delta):
    data['delta'] = pd.to_timedelta(delta , 'days')
    data['date_entered'] = data.visit_date + data.delta
    data['patient_id']= full_database.index
    data = data.reset_index()
    data = data.groupby(['facility','patient_id']).apply(caf.get_first_visit_date)
    return data

In [24]:
data1 = create_data_entry_date(full_database, 0)
data2 = create_data_entry_date(full_database , times)

In [25]:
data1['reasonDescEn'] = np.nan
data2['reasonDescEn'] = np.nan
data1['discDate'] = pd.NaT
data2['discDate'] = pd.NaT

In [26]:
months_of_interest = ['2017-02-01', '2017-03-01', '2017-04-01', '2017-05-01', 
                      '2017-06-01', '2017-07-01', '2017-08-01', '2017-09-01', 
                      '2017-10-01', '2017-11-01', '2017-12-01', '2018-01-01']

In [27]:
def make_report_trail(data, months_of_interest):
    months_reports = {}
    for month in months_of_interest:
        out = data.groupby(data.patient_id).apply(caf.status_patient, reference_date = month,
                                                analysis_date = month, grace_period = 90)
        if len(out) > 0 :
            months_reports[month] = out.status.value_counts()
    return pd.DataFrame.from_dict(months_reports, orient='index')

In [28]:
report_1 = data1.groupby('facility').apply(make_report_trail, months_of_interest)

In [29]:
report_2 = data2.groupby('facility').apply(make_report_trail, months_of_interest)

In [30]:
complete_reports = report_1.merge(report_2, 'left', left_index = True , right_index=True , 
                                  suffixes=(' real', ' observed'))

In [33]:
f, axarr = plt.subplots(4,4, sharex=True)
for a in range(4):
    for b in range(4):
        dat = complete_reports.loc[list(complete_reports.index.levels[0])[2*b + a]]
        dat.index = pd.to_datetime(dat.index)
        axarr[a,b].plot(dat) ;



In [ ]: